Core microbiome
See also related functions for the analysis of rare and variable taxa (noncore_members; noncore_abundance; rare_members; rare_abundance; low_abundance).
HITChip Data
Load example data:
# Load data
library(microbiome)
data(peerj32)
# Rename the data
pseq <- peerj32$phyloseq
# Calculate compositional version of the data
# (relative abundances)
pseq.rel <- microbiome::transform(pseq, "compositional")Prevalence of taxonomic groups
Relative population frequencies; at 1% compositional abundance threshold:
## Roseburia intestinalis et rel. Eubacterium hallii et rel.
## 1.0000000 1.0000000
## Clostridium nexile et rel. Ruminococcus obeum et rel.
## 1.0000000 0.9772727
## Coprococcus eutactus et rel. Ruminococcus lactaris et rel.
## 0.9772727 0.9545455
Absolute population frequencies (sample count):
## Roseburia intestinalis et rel. Eubacterium hallii et rel.
## 44 44
## Clostridium nexile et rel. Ruminococcus obeum et rel.
## 44 43
## Coprococcus eutactus et rel. Ruminococcus lactaris et rel.
## 43 42
Core microbiota analysis
If you only need the names of the core taxa, do as follows. This returns the taxa that exceed the given prevalence and detection thresholds.
A full phyloseq object of the core microbiota is obtained as follows:
Retrieving the associated taxa names from the phyloseq object:
Core abundance and diversity
Total core abundance in each sample (sum of abundances of the core members):
Core visualization
Core line plots
Determine core microbiota across various abundance/prevalence thresholds with the blanket analysis (Salonen et al. CMI, 2012) based on various signal and prevalences.
# With compositional (relative) abundances
det <- c(0, 0.1, 0.5, 2, 5, 20)/100
prevalences <- seq(.05, 1, .05)
plot_core(pseq.rel, prevalences = prevalences, detections = det, plot.type = "lineplot") + xlab("Relative Abundance (%)")Core heatmaps
This visualization method has been used for instance in Intestinal microbiome landscaping: Insight in community assemblage and implications for microbial modulation strategies. Shetty et al. FEMS Microbiology Reviews fuw045, 2017.
Note that you can order the taxa on the heatmap with the taxa.order argument.
<<<<<<< HEAD# Core with compositionals:
prevalences <- seq(.05, 1, .05)
detections <- 10^seq(log10(1e-3), log10(.2), length = 10)
# Also define gray color palette
gray <- gray(seq(0,1,length=5))
p <- plot_core(pseq.rel, plot.type = "heatmap", colours = gray,
prevalences = prevalences, detections = detections) +
xlab("Detection Threshold (Relative Abundance (%))")
print(p)
# Core with absolute counts and horizontal view:
# and minimum population prevalence (given as percentage)
detections <- 10^seq(log10(1), log10(max(abundances(pseq))/10), length = 10)
library(RColorBrewer)
p <- plot_core(pseq, plot.type = "heatmap",
prevalences = prevalences,
detections = detections,
colours = rev(brewer.pal(5, "Spectral")),
min.prevalence = .2, horizontal = TRUE)
print(p)# Core with compositionals:
prevalences <- seq(.05, 1, .05)
detections <- 10^seq(log10(1e-3), log10(.2), length = 10)
# Also define gray color palette
gray <- gray(seq(0,1,length=5))
p <- plot_core(pseq.rel, plot.type = "heatmap", colours = gray,
prevalences = prevalences, detections = detections) +
xlab("Detection Threshold (Relative Abundance (%))")
print(p)
# Core with absolute counts and horizontal view:
# and minimum population prevalence (given as percentage)
detections <- 10^seq(log10(1), log10(max(abundances(pseq))/10), length = 10)
library(RColorBrewer)
p <- plot_core(pseq, plot.type = "heatmap",
prevalences = prevalences,
detections = detections,
colours = rev(brewer.pal(5, "Spectral")),
min.prevalence = .2, horizontal = TRUE)
print(p)# Core with compositionals:
prevalences <- seq(.05, 1, .05)
detections <- 10^seq(log10(1e-3), log10(.2), length = 10)
# Also define gray color palette
gray <- gray(seq(0,1,length=5))
p <- plot_core(pseq.rel, plot.type = "heatmap", colours = gray,
prevalences = prevalences, detections = detections) +
xlab("Detection Threshold (Relative Abundance (%))")
print(p)
# Core with absolute counts and horizontal view:
# and minimum population prevalence (given as percentage)
detections <- 10^seq(log10(1), log10(max(abundances(pseq))/10), length = 10)
library(RColorBrewer)
p <- plot_core(pseq, plot.type = "heatmap",
prevalences = prevalences,
detections = detections,
colours = rev(brewer.pal(5, "Spectral")),
min.prevalence = .2, horizontal = TRUE)
print(p)Core Microbiota using Amplicon data
Make phyloseq object
This tutorial is useful for analysis of output files from (Mothur), (QIIME or QIIME2) or any tool that gives a biom file as output. There is also a simple way to read comma seperated (*.csv) files.
Simple comma seperated files:
library(microbiome)
otu.file <-
system.file("extdata/qiita1629_otu_table.csv",
package='microbiome')
tax.file <- system.file("extdata/qiita1629_taxonomy_table.csv",
package='microbiome')
meta.file <- system.file("extdata/qiita1629_mapping_subset.csv",
package='microbiome')
pseq.csv <- read_phyloseq(
otu.file=otu.file,
taxonomy.file=tax.file,
metadata.file=meta.file, type = "simple")Biom file:
# Read the biom file
biom.file <-
system.file("extdata/qiita1629.biom",
package = "microbiome")
# Read the mapping/metadata file
meta.file <-
system.file("extdata/qiita1629_mapping.csv",
package = "microbiome")
# Make phyloseq object
pseq.biom <- read_phyloseq(otu.file = biom.file,
metadata.file = meta.file,
taxonomy.file = NULL, type = "biom")Mothur shared OTUs and Consensus Taxonomy:
otu.file <- system.file(
"extdata/Baxter_FITs_Microbiome_2016_fit.final.tx.1.subsample.shared",
package='microbiome')
tax.file <- system.file(
"extdata/Baxter_FITs_Microbiome_2016_fit.final.tx.1.cons.taxonomy",
package='microbiome')
meta.file <- system.file(
"extdata/Baxter_FITs_Microbiome_2016_mapping.csv",
package='microbiome')
pseq.mothur <- read_phyloseq(otu.file=otu.file,
taxonomy.file =tax.file,
metadata.file=meta.file, type = "mothur")
print(pseq.mothur)Now, we proceed to core microbiota analysis.
Core microbiota analysis
Here the data from (Halfvarson et al. Nature Microbiology 2, 2017) will be used and only healthy samples will be included.
# check the data
print(pseq.biom)
# Filter the data to include only healthy subjects
pseq.1 <- subset_samples(pseq.biom, ibd_subtype == "HC" & timepoint == "1")
print(pseq.1)
# keep only taxa with positive sums
pseq.2 <- prune_taxa(taxa_sums(pseq.1) > 0, pseq.1)
print(pseq.2)
# Calculate compositional version of the data
# (relative abundances)
pseq.rel <- microbiome::transform(pseq.2, "compositional")Prevalence of taxonomic groups
Relative population frequencies; at 1% compositional abundance threshold:
We can see that only OTU ids are listed with no taxonomic information. Absolute population frequencies (sample count):
Core microbiota analysis
If you only need the names of the core taxa, do as follows. This returns the taxa that exceed the given prevalence and detection thresholds.
A full phyloseq object of the core microbiota is obtained as follows:
Retrieving the associated taxa names from the phyloseq object:
core.taxa <- taxa(pseq.core)
class(core.taxa)
# get the taxonomy data
tax.mat <- tax_table(pseq.core)
tax.df <- as.data.frame(tax.mat)
# add the OTus to last column
tax.df$OTU <- rownames(tax.df)
# select taxonomy of only
# those OTUs that are core memebers based on the thresholds that were used.
core.taxa.class <- dplyr::filter(tax.df, rownames(tax.df) %in% core.taxa)
knitr::kable(head(core.taxa.class))Core abundance and diversity
Total core abundance in each sample (sum of abundances of the core members):
Core visualization
Core line plots
Determine core microbiota across various abundance/prevalence thresholds with the blanket analysis (Salonen et al. CMI, 2012) based on various signal and prevalences.
Core heatmaps
This visualization method has been used for instance in Intestinal microbiome landscaping: Insight in community assemblage and implications for microbial modulation strategies. Shetty et al. FEMS Microbiology Reviews fuw045, 2017.
Note that you can order the taxa on the heatmap with the order.taxa argument.
# Core with compositionals:
prevalences <- seq(.05, 1, .05)
detections <- 10^seq(log10(1e-5), log10(.2), length = 10)
# Also define gray color palette
gray <- gray(seq(0,1,length=5))
p <- plot_core(pseq.rel, plot.type = "heatmap", colours = gray,
prevalences = prevalences, detections = detections, min.prevalence = .5) +
xlab("Detection Threshold (Relative Abundance (%))")
print(p)
# Same with the viridis color palette
# color-blind friendly and uniform
# options: viridis, magma, plasma, inferno
# https://cran.r-project.org/web/packages/viridis/vignettes/intro-to-viridis.html
# Also discrete=TRUE versions available
library(viridis)
print(p + scale_fill_viridis())As it can be seen, we see only OTu IDs and this may not be useful to interpret the data. We need to repreoccess this figure to include taxonomic information. We can do this as follows:
library(RColorBrewer)
library(knitr)
# Core with absolute counts and vertical view:
# and minimum population prevalence (given as percentage)
detections <- 10^seq(log10(1), log10(max(abundances(pseq.2))/10), length = 10)
healthycore <- plot_core(pseq.2, plot.type = "heatmap",
prevalences = prevalences,
detections = detections,
colours = rev(brewer.pal(5, "Spectral")),
min.prevalence = .90, horizontal = F)
# get the data used for plotting
df <- healthycore$data
# get the list of OTUs
list <- df$Taxa
# check the OTU ids
# print(list)
# get the taxonomy data
tax <- tax_table(pseq.2)
tax <- as.data.frame(tax)
# add the OTus to last column
tax$OTU <- rownames(tax)
# select taxonomy of only
# those OTUs that are used in the plot
tax2 <- dplyr::filter(tax, rownames(tax) %in% list)
# head(tax2)
# We will merege all the column into one except the Doamin as all is bacteria in this case
tax.unit <- tidyr::unite(tax2, Taxa_level,c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species", "OTU"), sep = "_;", remove = TRUE)
tax.unit$Taxa_level <- gsub(pattern="[a-z]__",replacement="", tax.unit$Taxa_level)
# add this new information into the plot data df
df$Taxa <- tax.unit$Taxa_level
# you can see now we have the taxonomic information
knitr::kable(head(df))
# replace the data in the plot object
healthycore$data <- df
plot(healthycore + theme(axis.text.y = element_text(face="italic")))